library(ggplot2)
library(dplyr)
library(tidyr)
library(foreach)
library(doParallel)
library(doRNG)
library(lars)
library(xtable)
library(mboost)

cl <- makeForkCluster(8)
registerDoParallel(cl)
registerDoRNG(1987)

fit_lars <- function(X, y, folds) {
  cv_res <- cv.lars(X, y, folds, type = "lar", mode = "fraction", plot.it = FALSE, intercept = FALSE)
  opt_frac <- min(cv_res$cv)
  opt_frac <- cv_res$index[which(cv_res$cv <= opt_frac)][[1]]
  lasso_path <- lars(X, y, type = "lar", intercept = FALSE)
  predict.lars(lasso_path, type = "coefficients", mode = "fraction", s = opt_frac)
}

mse <- function(x, y) mean((x - y)^2)

## polynomial regression example
## shows over/under fitting and how a method which optimizes
## expected risk can beat optimizers of empirical risk
n <- 100
m <- 1000
sigma <- 1
train <- 1:(n / 2)
test <- (n / 2 + 1):n

out <- foreach(i = 1:m, .combine = "rbind", .packages = c("lars", "dplyr")) %dorng% {
  df <- dplyr::data_frame("x" = runif(n, -5, 5),
                          "fx" = sin(x),
                          "y" = fx + rnorm(n, 0, sigma),
                          "set" = c(rep("train", length(train)), rep("test", length(test))),
                          "iteration" = rep(i, n))
  f1 <- lm(y ~ -1 + x, df[train, ])
  f3 <- lm(y ~ -1 + poly(x, 3), df[train, ])
  f10 <- lm(y ~ -1 + poly(x, 10), df[train, ])
  f10r <- fit_lars(poly(df$x[train], 10), df$y[train], 10)

  df$"degree 1" <- c(f1$fitted, predict(f1, newdata = df[test, ]))
  df$"degree 3" <- c(f3$fitted, predict(f3, newdata = df[test, ]))
  df$"degree 10" <- c(f10$fitted, predict(f10, newdata = df[test, ]))
  df$"degree 10, regularized" <- c(poly(df$x[train], 10) %*% coef(f10r),
                                   poly(df$x[test], 10) %*% coef(f10r))
  df
}

out <- out %>% gather(model, estimate, -one_of("iteration", "y", "x", "fx", "set"))
tab <- out %>% group_by(model, set) %>%
  summarise("risk" = mse(estimate, y)) %>%
  spread(set, risk) %>%
  rename("expected risk" = test, "empirical risk" = train)
tab_add <- out %>% group_by(model) %>%
  summarise("excess risk" = mse(estimate, fx), "Bayes risk" = mse(fx, y)) %>%
  select(-one_of("model"))
tab <- data.frame(tab, tab_add, check.names = FALSE)
tab <- tab[c(1, 4, 2, 3), ]
row.names(tab) <- tab$model
tab$model <- NULL
xtable(tab, digits = 2)

pout <- out %>% filter(iteration %in% sample(1:m, size = 75) & set == "train")
pout$model <- factor(pout$model, levels = c("degree 1", "degree 3", "degree 10", "degree 10, regularized"))
p <- ggplot(pout, aes(x, y)) +
  geom_point(alpha = .05) +
  facet_wrap(~ model) +
  geom_line(aes(x, estimate, group = iteration), alpha = .2, colour = "red") +
  stat_smooth(colour = "blue", method = "loess", se = FALSE, size = 1, span = .25) +
  labs(x = expression(list(X %~% Unif(-5, 5), n == 100)),
       y = expression(list(Y == sin(X) + epsilon, epsilon %~% N(0, 1)))) +
  theme_bw() +
  coord_cartesian(xlim = c(-5, 5), ylim = c(-2, 2))
ggsave("sine.png", p, width = 12, height = 8)

## learning curve example
## generate data and show the divergance of the train and test error

k <- seq(0, 1, length.out = 50)
out <- foreach(i = 1:m, .combine = "rbind", .packages = c("mboost", "dplyr")) %dorng% {
  df <- data_frame("x" = runif(n, -5, 5),
                   "y" = sin(x) + rnorm(n))
  out <- lapply(k, function(z) {
    fit <- mboost(y ~ x, df[1:(n/2), ], "btree",
                  control = boost_control(mstop = 100, nu = z, risk = "none"))
    c(mse(predict(fit)[, 1], df$y[1:(n/2)]),
      mse(predict(fit, newdata = as.data.frame(df[(n/2 + 1):n, ]))[, 1], df$y[(n/2 + 1):n]))
  })
  out <- do.call("rbind", out)
  out <- as.data.frame(out)
  colnames(out) <- c("train", "test")
  out$iteration <- rep(i, length(k))
  out$k <- k
  out
}

stopCluster(cl)

out <- gather(out, set, error, -one_of("iteration", "k"))
out <- out %>% group_by(k, set) %>% summarise(error = mean(error))
out <- rbind(as.data.frame(out), data.frame(k, set = rep("bayes", length(k)), error = rep(1, length(k))))
best <- out$k[which(out$error == min(out$error[out$set == "test"]))]

p <- ggplot(out, aes(k, error, group = set, colour = set)) +
  geom_line() +
  scale_colour_manual(name = "", breaks = c("train", "test", "bayes"),
                      labels = c("Empirical Risk (training error)",
                                 "Expected Risk (test error)",
                                 "Bayes Risk (minimal error)"),
                      values = c("red", "blue", "black")) +
  geom_vline(aes(xintercept = best), linetype = "dashed") + 
  labs(x = expression(paste("Complexity (", nu, ")")), y = expression(list("MSE", n == 100))) +
  theme_bw()
ggsave("learning.png", p, width = 10, height = 6)
